#########################################################################
# Figure4.R
#########################################################################

library(tidyverse)
library(ggpubr)
library(combinat)
library(stringr)
source("Utilities.R")

#########################################################################
# 1. Processing Data
#########################################################################

dt <- read.csv("qualtrics_Alaska.csv")[-c(1:3), ] %>%
  mutate(id = row_number()) %>% 
  mutate(C3_DO = gsub("  ", " ", C3_DO, perl = TRUE)) # Get rid of double space (from Qualtrics)



# Extract observed ranking responses

obs <- dt %>%
  mutate(across(ends_with("_DO"), ~ gsub("\\|", "", .x)))


obs_house_full <- get_obs_rank(
  order = "C4_DO",
  i_first = "C4_1", i_second = "C4_2",
  i_third = "C4_3", i_fourth = "C4_4", df = obs
)

obs_senate_full <- get_obs_rank(
  order = "C6_DO",
  i_first = "C6_1", i_second = "C6_2",
  i_third = "C6_3", i_fourth = "C6_4", df = obs
)


obs_house_part <- get_obs_rank(
  order = "C3_DO",
  i_first = "C3_1", i_second = "C3_2",
  i_third = "C3_3", i_fourth = "C3_4", df = obs
)


obs_senate_part <- get_obs_rank(
  order = "C5_DO",
  i_first = "C5_1", i_second = "C5_2",
  i_third = "C5_3", i_fourth = "C5_4", df = obs
)


# Filling in the last digit

obs_house_part[obs_house_part == "-99132"] <- "4132"
obs_house_part[obs_house_part == "-99213"] <- "4213"
obs_house_part[obs_house_part == "-99312"] <- "4312"
obs_house_part[obs_house_part == "-99321"] <- "4321"
obs_house_part[obs_house_part == "123-99"] <- "1234"
obs_house_part[obs_house_part == "1-9932"] <- "1432"
obs_house_part[obs_house_part == "13-992"] <- "1342"
obs_house_part[obs_house_part == "132-99"] <- "1324"
obs_house_part[obs_house_part == "2-9913"] <- "2413"
obs_house_part[obs_house_part == "21-993"] <- "2143"
obs_house_part[obs_house_part == "213-99"] <- "2134"
obs_house_part[obs_house_part == "231-99"] <- "2314"
obs_house_part[obs_house_part == "3-9912"] <- "3412"
obs_house_part[obs_house_part == "31-992"] <- "3142"
obs_house_part[obs_house_part == "312-99"] <- "3124"
obs_house_part[obs_house_part == "32-991"] <- "3241"
obs_house_part[obs_house_part == "321-99"] <- "3214"

obs_senate_part[obs_senate_part == "-99123"] <- "4123"
obs_senate_part[obs_senate_part == "-99231"] <- "4231"
obs_senate_part[obs_senate_part == "-99321"] <- "4321"
obs_senate_part[obs_senate_part == "1-9923"] <- "1423"
obs_senate_part[obs_senate_part == "12-993"] <- "1243"
obs_senate_part[obs_senate_part == "21-993"] <- "2143"
obs_senate_part[obs_senate_part == "23-991"] <- "2341"
obs_senate_part[obs_senate_part == "231-99"] <- "2314"
obs_senate_part[obs_senate_part == "3-9912"] <- "3412"
obs_senate_part[obs_senate_part == "312-99"] <- "3124"
obs_senate_part[obs_senate_part == "32-991"] <- "3241"
obs_senate_part[obs_senate_part == "321-99"] <- "3214"


#########################################################################
# 2. Panel A
#########################################################################

# Only keep observed rankings with four digits

cut_pattern1 <- obs_house_part[str_detect(obs_house_part, "^.{4}$")]
cut_pattern2 <- obs_house_full 
cut_pattern3 <- obs_senate_part[str_detect(obs_senate_part, "^.{4}$")]
cut_pattern4 <- obs_senate_full 


# Get the proportions of unique observed responses

g1 <- (table(cut_pattern1) / length(cut_pattern1)) %>%
  as_tibble() %>%
  rename(pat = cut_pattern1) %>%
  as.data.frame()
g2 <- (table(cut_pattern2) / length(cut_pattern2)) %>%
  as_tibble() %>%
  rename(pat = cut_pattern2) %>%
  as.data.frame()
g3 <- (table(cut_pattern3) / length(cut_pattern3)) %>%
  as_tibble() %>%
  rename(pat = cut_pattern3) %>%
  as.data.frame()
g4 <- (table(cut_pattern4) / length(cut_pattern4)) %>%
  as_tibble() %>%
  rename(pat = cut_pattern4) %>%
  as.data.frame()

g1$Format <- "option"
g2$Format <- "forced"
g3$Format <- "option"
g4$Format <- "forced"
g1$race <- "U.S. House"
g2$race <- "U.S. House"
g3$race <- "U.S. Senate"
g4$race <- "U.S. Senate"
g1$N <- length(cut_pattern1)
g2$N <- length(cut_pattern2)
g3$N <- length(cut_pattern3)
g4$N <- length(cut_pattern4)

gdata <- rbind(g1, g2, g3, g4)
gdata$st <- sqrt(gdata$n * (1 - gdata$n) * (1 / gdata$N))
gdata$up <- gdata$n + 1.96 * gdata$st
gdata$lw <- gdata$n - 1.96 * gdata$st
gdata$lw <- ifelse(gdata$lw < 0, 0, gdata$lw)


gdata <- gdata %>%
  mutate(
    type = case_when(
      pat %in% c("1234", "4321") ~ "donkey",
      pat %in% c(
        "1324", "1423", "2143", "2314",
        "2413", "3142", "3241", "3412",
        "4132", "4231"
      ) ~ "zigzag",
      TRUE ~ "others"
    ),
    pat = factor(pat, c(
      "1234", "4321",
      "1324", "1423", "2143", "2314",
      "2413", "3142", "3241", "3412",
      "4132", "4231",
      "1243", "1342", "1432", "2134",
      "2341", "2431", "3124", "3214",
      "3421", "4123", "4213", "4312"
    ))
  )


# Generate Panel A 

pA <- ggplot(gdata, aes(
  x = reorder(pat, desc(pat)), y = n,
  fill = Format, color = type
)) +
  geom_bar(stat = "identity", position = "dodge2", linewidth = 0.3) +
  scale_color_manual(values = c("#b0015a", "#a5900d", "#128ba0")) +
  scale_fill_manual(values = c("#FFFFFF", "gray60")) +
  geom_hline(yintercept = 1 / factorial(4), linetype = "dashed", linewidth = 0.3) +
  geom_vline(xintercept = 22.5, linetype = "solid", col = "#b0015a", linewidth = 0.3) +
  geom_vline(xintercept = 12.5, linetype = "solid", col = "#128ba0", linewidth = 0.3) +
  xlab("") +
  ylab("") +
  facet_wrap(vars(race)) +
  theme_bw() +
  annotate("text", x = 23, y = 0.355, label = "Diagonal", size = 2.5, color = "#b0015a") +
  annotate("text", x = 13.5, y = 0.35, label = "Zigzag", size = 2.5, color = "#128ba0") +
  annotate("text", x = 1.5, y = 0.35, label = "Dogleg", size = 2.5, color = "#a5900d") +
  annotate("text", x = 11.5, y = 0.18, label = "Forced ranking", size = 2, color = "black") +
  annotate("text", x = 10.5, y = 0.18, label = "Optional ranking", size = 2, color = "black") +
  annotate("text",
    x = 20, y = 0.18, label = "Uniform Distribution \n(No Pattern Ranking)",
    size = 2, color = "black"
  ) +
  geom_segment(aes(x = 20, y = 0.1, xend = 20, yend = 0.05),
    arrow = arrow(length = unit(0.08, "cm")), col = "black", linewidth = 0.3
  ) +
  geom_segment(aes(x = 11.5, y = 0.1, xend = 11.25, yend = 0.04),
    arrow = arrow(length = unit(0.08, "cm")), col = "gray60", linewidth = 0.3
  ) +
  geom_segment(aes(x = 10.5, y = 0.1, xend = 10.75, yend = 0.04),
    arrow = arrow(length = unit(0.08, "cm")), col = "#a5900d", linewidth = 0.3
  ) +
  theme(
    legend.position = "none",
    plot.margin = margin(0.2, 0.2, 0.2, -0.2, "cm"),
    text = element_text(size = 8),
    panel.grid.major = element_blank()
  ) +
  ylim(0, 0.4) +
  coord_flip() +
  ggtitle("A. Proportion of Observed Rankings")

pA



# Appendix F
# Overall proportions by format, type, and race
gdata %>%
  group_by(race, Format, type) %>%
  summarise(add = mean(n)) %>%
  xtable::xtable()

gdata %>%
  group_by(race, Format, type) %>%
  summarise(add = sum(n)) %>%
  xtable::xtable()

#########################################################################
# 3. Panel B
#########################################################################

# Visualize all types of pattern ranking

rankings <- permn(4)
rankings <- as.data.frame(do.call(rbind, rankings))
title <- unite(rankings, col = all, sep = "")

gdat <- rankings %>%
  unite(col = value, sep = "", remove = FALSE) %>%
  pivot_longer(
    cols = !value,
    names_to = "position",
    values_to = "x"
  ) %>%
  mutate(
    y = case_when(
      position == "V1" ~ 4,
      position == "V2" ~ 3,
      position == "V3" ~ 2,
      position == "V4" ~ 1
    ),
    type = case_when(
      value %in% c("1234", "4321") ~ "donkey",
      value %in% c(
        "1324", "1423", "2143", "2314",
        "2413", "3142", "3241", "3412",
        "4132", "4231"
      ) ~ "zigzag",
      TRUE ~ "others"
    )
  ) %>%
  arrange(type) %>%
  data.frame()


# Generate Panel B

pB <- ggplot(gdat, aes(x = x, y = y, col = type)) +
  geom_point(size = 1, pch = 16) +
  geom_path(aes(linetype = type),
    linewidth = 0.5, linejoin = "mitre", lineend = "butt"
  ) +
  scale_color_manual(values = c("#b0015a", "#a5900d", "#128ba0")) +
  facet_wrap(
    ~ factor(value, c(
      "1234", "4321",
      "1324", "1423", "2143", "2314",
      "2413", "3142", "3241", "3412",
      "4132", "4231",
      "1243", "1342", "1432", "2134",
      "2341", "2431", "3124", "3214",
      "3421", "4123", "4213", "4312"
    )),
    ncol = 6
  ) +
  xlab("") +
  ylab("") +
  xlim(1, 4) +
  ylim(1, 4) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    plot.margin = margin(0.2, 0.4, 0.2, 0.2, "cm"),
    text = element_text(size = 8),
    panel.grid.major = element_blank()
  ) +
  ggtitle("B. Types of Pattern Ranking")


pB



# Main Text (Section 3.2) -- Chi-square test
#  Null hypothesis: all unique rankings are equally likely

chisq.test(table(cut_pattern1))
chisq.test(table(cut_pattern2))
chisq.test(table(cut_pattern3))
chisq.test(table(cut_pattern4))

# --> rejecting the null with significant result


#########################################################################
# 4. Panel C
#########################################################################

# Visualize people's submitted rankings across the optional and forced options
# Extracting Rs who submitted full rankings in both options

pattern1 <- obs_house_part[str_detect(obs_house_part, "^.{4}$")]
pattern2 <- obs_house_full[str_detect(obs_house_part, "^.{4}$")]
pattern3 <- obs_senate_part[str_detect(obs_senate_part, "^.{4}$")]
pattern4 <- obs_senate_full[str_detect(obs_senate_part, "^.{4}$")]

gdax1 <- data.frame(pattern1, pattern2) %>%
  mutate(
    all_rank = str_detect(pattern1, "^.{4}$"),
    office = "U.S. House"
  ) %>%
  filter(all_rank == T) %>%
  rename(
    optional = pattern1,
    forced = pattern2
  )

gdax2 <- data.frame(pattern3, pattern4) %>%
  mutate(
    all_rank = str_detect(pattern3, "^.{4}$"),
    office = "U.S. Senate"
  ) %>%
  filter(all_rank == T) %>%
  rename(
    optional = pattern3,
    forced = pattern4
  )

# Proportions that Rs submit the same ranking pattern
mean(gdax1$optional == gdax1$forced) # US House
mean(gdax2$optional == gdax2$forced) # US Senate


gdax <- rbind(gdax1, gdax2) %>%
  mutate(identical = ifelse(optional == forced, "Yes", "No"))


# Generate Panel C

set.seed(142)
pC <- ggplot(gdax, aes(x = optional, y = forced)) +
  geom_abline(lwd = 2, alpha = 0.3) +
  geom_jitter(
    alpha = 0.5, pch = 16, size = 1,
    width = 0.5, height = 0.5,
    aes(col = identical)
  ) +
  scale_color_manual(values = c("slategray", "#b0015a")) +
  facet_wrap(vars(office)) +
  theme_bw() +
  xlab("Optional ranking") +
  ylab("Forced ranking") +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none",
    plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"),
    text = element_text(size = 8),
    panel.grid.major = element_blank()
  ) +
  ggtitle("C. Observed Rankings in Forced and Optional Questions")

pC


#########################################################################
# 5. Combining All Panels
#########################################################################

ggarrange(pA, pB, pC,
  ncol = 1, nrow = 3,
  heights = c(0.9, 0.75, 0.7)
)

ggsave("Figure4.pdf", width = 5, height = 9)


#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################
